/***********[std_wcnf.cc]
Copyright (c) 2020, Fahiem Bacchus

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

***********/
#include <fstream>
#include <iostream>
#include <stdexcept>
#include "zfstream.h"

#include "Wcnf.h"

using std::cerr;
using std::cin;
using std::cout;
using std::runtime_error;
using std::string;

void print_usage(const char* name) {
  // clang-format off
  cerr << "USAGE: " << name
       << " [-old] [-top-exact] [-preserve] [--] [wcnf_file] >std_wcnf_file\n"
       << "    reads wcnf_file and outputs standardized version to stdout\n"
       << "    no wcnf_file argument ==> read from stdin.\n"
       << "    NOTE: each clause in input must start a new line and be on one line only\n"
       << "\n"
       << "    [-old]       standardize into old format with p-line\n"
          "                 and hards having weight == top == (sum of soft wts + 1)\n"
       << "                 default is new format where hards clauses have no weight but have an\n"
       << "                 initial \'h\' character starting the line\n"
       << "    [-top-exact] regard an input clause to be hard iff its weight == top\n"
       << "                 as specified in file's pline. Otherwise the default is\n"
       << "                 clause is hard <==> its weight >= top.\n"

       << "    [-preserve]  Don't set mono-weighted instances to soft weight 1 nor\n"
          "                 remove gaps in variable ordering\n"
          "                 (library instances are modified in this way)\n"
       << "    [--]         double hyphen: regard very next argument as name of wcnf_file\n"
       << "                 irrespective of its format (e.g. file name can start with '-'\n"
       << "                 NOTE: '--' requires exactly one argument filename afterwards\n";
  // clang-format on
}

int main(int argc, char* argv[]) {
  /*
    Operate as filter converting a maxsat wcnf instance into
    a standardized wcnf. Input wcnf can be any of the old formats:
    a) p cnf nvars ncls
          treat all caluses as soft with unit weight. clauses do not
          have initial number specifying weight
    b) p wcnf nvars ncls
          treat all caluses as soft with weight specified as initial
          number in clause line
    c) p wcnf nvars ncls top (the recent standard).
          Treat all clauses with weight < top as soft. Every clause
          (hard and soft) has a weight specified as the initial number
          in clause line
    d) No pline (the new standard).
          All softs have an initial weight (the first number in the
          clause line) but hards are specified by a initial 'h'
          character on the line.

    The standard format has the following properties:
      1. Format (d) (no pline) is used
      2. one line per clause

      If -old is specified on the command line
      1. Format (c) is used
      2. top = sum of soft clause weights + 1 (with softs as defined above)
         NOTE. For some instances this is limiting as the sum of the soft
         clause weights can exceed max int64_t.
      3. all hards have the same weight = top
      4. one line per clause.

      In both new and old formats an initial comment section with json formatted
      stats for this instance is added to the file (strip initial line and
      intial character of each line up until the closing c} line):

       c Standarized MaxSat Instance
       c{
       c "sha1": "<sha1 code of file after removing all comment lines>",
       c "nvars": <number of variables>,
       c "ncls": <number of clauses>,
       c "total_lits": <sum of the sizes of all clauses (softs and hards)>
       c "nhards": <number of hard clauses>,
       c "hard_clause_stats":
       c    { "min": <min length>, "max": <max length>, "ave": <ave. length>,
       c      "stddev":  <standard deviation of lengths> },
       c "nsofts": <number of soft clauses>,
       c "soft_clause_stats":
       c    { "min": <min length>, "max": <max length>, "ave": <ave. length>,
       c      "stddev": <standard deviation of lengths> },
       c "nsoft_wts": <number of different weights of soft clauses>,
       c "weight_stats":
       c    { "min": <min weight>, "max": <max  weight>, "ave": <ave. weight>,
       c      "stddev": <standard deviation of weight> }
       c}
       c------------------------------------------------------------
       NOTE: an standardized instance can be passed back into the program...in
       this case the instance stats at the top of the file will updated and
       replaced (so only one json record exists at the top of the file.
   */
  std::ios::sync_with_stdio(false);
  cin.tie(0);
  cout.tie(0);
  cerr.tie(0);

  string file_name;
  bool use_exact_top{false};
  bool use_old_format{false};
  bool preserve{false};

  try {
    for (int i = 1; i < argc; ++i) {
      string arg;
      arg = argv[i];
      if (arg == "-help" || arg == "-h" || arg == "--help") {
        print_usage(argv[0]);
        exit(1);
      } else if (arg == "-old")
        use_old_format = true;
      else if (arg == "-top-exact")
        use_exact_top = true;
      else if (arg == "-preserve")
        preserve = true;
      else if (arg == "--") {
        if (i + 2 != argc)
          throw runtime_error(
              "-- on command line must be followed by exactly one file name "
              "argument\n");
        else {
          file_name = argv[i + 1];
          break;
        }
      } else if (arg[0] == '-')
        throw runtime_error("Unknown option \"" + arg + "\"\n");
      else {
        if (!file_name.empty())
          throw runtime_error("extranous argument \"" + arg + "\"\n");
        file_name = arg;
      }
    }
  } catch (std::runtime_error& ex) {
    cerr << "Error: bad arguments " << ex.what();
    print_usage(argv[0]);
    exit(1);
  } catch (...) {
    print_usage(argv[0]);
    exit(1);
  }

  gzifstream ifs;
  if (!file_name.empty()) {
    ifs.open(file_name, std::ios::in | std::ios::binary);
    if (!ifs.is_open()) {
      cerr << "Error: could not open file \"" << file_name << "\"\n";
      exit(1);
    }
  }
  try {
    Wcnf theFormula;
    theFormula.set_exact_top(use_exact_top);
    theFormula.set_old_fmt(use_old_format);
    theFormula.set_preserve(preserve);
    theFormula.loadFromStream(file_name.empty() ? cin : ifs);
    if (!file_name.empty()) ifs.close();
    theFormula.standardize();
    theFormula.output_stat_lines(cout);
    theFormula.outputInstance(cout);
  } catch (std::runtime_error& ex) {
    cerr << "Error: " << ex.what();
    return 1;
  } catch (...) {
    cerr << "Error: unknown";
    return 1;
  }
  return 0;
}
